# -*- coding: utf-8 -*-
import random
import scrapy
from scrapy import Request
from scrapy.exceptions import IgnoreRequest
from scrapy.utils.project import get_project_settings
from zc_core.dao.sku_pool_dao import SkuPoolDao
from zc_core.dao.batch_dao import BatchDao
from zc_core.util.done_filter import DoneFilter
from zc_core.util.http_util import retry_request
from chinapost.util.login import SeleniumLogin
from chinapost.rules import *
from zc_core.spiders.base import BaseSpider


class FullSpider(BaseSpider):
    name = 'full'
    custom_settings = {
        'CONCURRENT_REQUESTS': 12,
        # 'DOWNLOAD_DELAY': 0.3,
        'CONCURRENT_REQUESTS_PER_DOMAIN': 12,
        'CONCURRENT_REQUESTS_PER_IP': 12,
    }

    # 常用链接
    item_url = 'https://cgwzgy.11185.cn/oscp/goods/product/goodsDetail.do?saleRecordFlag=1&sn={}&salePeriod=1&pageNumber_saleRecord={}&pageSize_saleRecord={}'

    def __init__(self, batchNo=None, *args, **kwargs):
        super(FullSpider, self).__init__(batchNo=batchNo, *args, **kwargs)
        # 创建批次记录
        BatchDao().create_batch(self.batch_no)
        # 避免重复采集
        self.done_filter = DoneFilter(self.batch_no)
        # 订单采集截止日期
        self.order_filter = OrderItemFilter()
        # 每页订单量
        self.order_page_size = 10

    def start_requests(self):
        # cookies = SeleniumLogin().get_cookies()
        cookies = cookies = {'JSESSIONID': '71713859B13383CBEE1CEE1C66DBE3F5',
                   'centralSessionId': '49756d0c-83df-4702-8d84-f5399d5336ea'}
        if not cookies:
            self.logger.error('init cookie failed...')
            return
        self.logger.info('init cookie: %s', cookies)

        pool_list = SkuPoolDao().get_sku_pool_list()
        self.logger.info('全量：%s' % (len(pool_list)))
        random.shuffle(pool_list)
        for sku in pool_list:
            sku_id = sku.get('_id')
            offline_time = sku.get('offlineTime', 0)
            settings = get_project_settings()
            if offline_time > settings.get('MAX_OFFLINE_TIME', 2):
                self.logger.info('忽略: [%s][%s]', sku_id, offline_time)
                continue
            # 避免重复采集
            if self.done_filter.contains(sku_id) and not settings.get('FORCE_RECOVER', False):
                self.logger.info('已采: [%s]', sku_id)
                continue

            page_no = 1
            url = self.item_url.format(sku_id, page_no, self.order_page_size)
            yield Request(
                url=url,
                cookies=cookies,
                callback=self.parse_item_data,
                errback=self.error_back,
                meta={
                    'reqType': 'item',
                    'batchNo': self.batch_no,
                    'orderPageSize': self.order_page_size,
                    'pageNo': page_no,
                    'skuId': sku_id,
                },
                headers={
                    'Host': 'cgwzgy.11185.cn',
                    'Referer': 'https://cgwzgy.11185.cn/oscp/home/mainTc.html',
                    'Connection': 'keep-alive',
                    'Cache-Control': 'max-age=0',
                    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3741.400 QQBrowser/10.5.3863.400',
                    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
                    'Accept-Encoding': 'gzip, deflate',
                    'Accept-Language': 'zh-CN,zh;q=0.9',
                },
                priority=250,
                dont_filter=True
            )

    # 处理ItemData
    def parse_item_data(self, response):
        meta = response.meta
        sku_id = meta.get('skuId')
        curr_page = meta.get('pageNo')

        # 商品
        data = parse_item_data(response)
        if data and 'supplierSkuId' in data:
            self.logger.info('商品: [%s]' % data.get('skuId'))
            yield data
        else:
            self.logger.info('下架: [%s]' % meta.get('skuId'))

        # 订单
        order_list, need_next_page = parse_order_item(response)
        if order_list and len(order_list):
            self.logger.info('订单: sku=%s, page=%s, cnt=%s' % (sku_id, curr_page, len(order_list)))
            for order in order_list:
                yield order
            if need_next_page:
                # 订单总页数
                total_page = parse_total_page(response)
                next_page = curr_page + 1
                if total_page >= next_page:
                    # 采集下一页订单
                    yield Request(
                        url=self.item_url.format(sku_id, next_page, self.order_page_size),
                        cookies=response.request.cookies,
                        callback=self.parse_order_item,
                        errback=self.error_back,
                        meta={
                            'reqType': 'item',
                            'batchNo': self.batch_no,
                            'orderPageSize': self.order_page_size,
                            'pageNo': next_page,
                            'skuId': sku_id,
                        },
                        headers={
                            'Host': 'cgwzgy.11185.cn',
                            'Referer': 'https://cgwzgy.11185.cn/oscp/home/mainTc.html',
                            'Connection': 'keep-alive',
                            'Cache-Control': 'max-age=0',
                            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3741.400 QQBrowser/10.5.3863.400',
                            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
                            'Accept-Encoding': 'gzip, deflate',
                            'Accept-Language': 'zh-CN,zh;q=0.9',
                        },
                        priority=300,
                        dont_filter=True
                    )
        else:
            self.logger.info('无单: sku=%s, page=%s' % (sku_id, curr_page))

    # 处理分页
    def parse_order_item(self, response):
        meta = response.meta
        sku_id = meta.get('skuId')
        curr_page = meta.get('pageNo')

        order_list, need_next_page = parse_order_item(response)
        if order_list and len(order_list):
            self.logger.info('订单: sku=%s, page=%s, cnt=%s' % (sku_id, curr_page, len(order_list)))
            for order in order_list:
                yield order

            if need_next_page:
                total_page = parse_total_page(response)
                next_page = curr_page + 1
                if total_page >= next_page:
                    # 采集下一页订单
                    yield Request(
                        url=self.item_url.format(sku_id, next_page, self.order_page_size),
                        cookies=response.request.cookies,
                        callback=self.parse_order_item,
                        errback=self.error_back,
                        meta={
                            'reqType': 'item',
                            'batchNo': self.batch_no,
                            'orderPageSize': self.order_page_size,
                            'pageNo': next_page,
                            'skuId': sku_id,
                        },
                        headers={
                            'Host': 'cgwzgy.11185.cn',
                            'Referer': 'https://cgwzgy.11185.cn/oscp/home/mainTc.html',
                            'Connection': 'keep-alive',
                            'Cache-Control': 'max-age=0',
                            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3741.400 QQBrowser/10.5.3863.400',
                            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
                            'Accept-Encoding': 'gzip, deflate',
                            'Accept-Language': 'zh-CN,zh;q=0.9',
                        },
                        priority=500,
                        dont_filter=True
                    )
        else:
            self.logger.info('无单: sku=%s, page=%s' % (sku_id, curr_page))
